library(dplyr)

#### load healthsurveyload05022019-nongeo-main-smm-ACTIVE.R
load("kffsurvey/kffgeography/kff-joint-nogeo-05062019.Rdata")

### load geocodes (generated by KFFgeocoding05012019.R)
load("kffsurvey/kffgeography/joint-geocodes-05022019.Rdata")

geodta$PSRAID <- geodta$psraid

fips.xwalk <- read.csv("kffsurvey/kffgeography/geo-subset-09172018HF.csv")

### check mapping of counties to FIPS codes
dta.sub <- unique(fips.xwalk[,c("NEW_NFIPS","WFIPS")])
dta.sub2 <- dta.sub[order(dta.sub$NEW_NFIPS),]
dim(dta.sub2)
dta.sub2[1:30,]

### 161= early April; not entered
### 160
### 80
table(geodta$NUMBER[! geodta$NUMBER %in% sdat$NUMBER])
table(sdat$NUMBER[! sdat$NUMBER %in% geodta$NUMBER])

gdta <- merge(sdat,geodta,by=c("PSRAID","NUMBER"))

###
test <- gdta[gdta$NUMBER==137,]

### identify geocode availability by variable, survey
numbers <- unique(gdta$NUMBER)
type.mat <- as.data.frame(as.matrix(NA,length(numbers),5))
for(i in 1:length(numbers)){
	num <- numbers[i]
	dta.sub <- subset(gdta,NUMBER==num)
	type.mat[i,1] <- numbers[i]
	type.mat[i,2] <- sum(! is.na(dta.sub$ZIP))
	type.mat[i,3] <- sum(! is.na(dta.sub$ZIPCODE))

	type.mat[i,4] <- sum(! is.na(dta.sub$NFIPS))
	type.mat[i,5] <- sum(! is.na(dta.sub$WFIPS))
}
colnames(type.mat) <- c("NUMBER","ZIP","ZIPCODE","NFIPS","WFIPS")
type.mat[order(type.mat[,1]),]

gdta$ZIPJ <- gdta$ZIPCODE



### load ZIP 2010
load("kffsurvey/kffgeography/zipswcounty2010-09142018.Rdata")

#dtaZ$ZIPJ <- dtaZ$ZIP
gdta.z <- merge(gdta,dtaZfinal,by=c("ZIPJ"),all.x=T)
sum(gdta.z$NFIPS==gdta.z$STCOFIPS,na.rm=T)
sum(! gdta.z$NFIPS==gdta.z$STCOFIPS,na.rm=T)

gdta.z$IDALT <- 1:dim(gdta.z)[1]

gdta.z.sub <- subset(gdta.z,select=c("IDALT","NUMBER","WFIPS","STCOFIPS","ZIPJ"))

### check mapping of counties to FIPS codes
dta.sub <- gdta.z[,c("STCOFIPS","WFIPS")]
dta.sub$WFIPS <- as.character(toupper(dta.sub$WFIPS))
dta.sub2 <- unique(dta.sub[order(dta.sub$STCOFIPS),])
dim(dta.sub2)
dta.sub2[1:20,]

gdta.z.na <- gdta.z#[gdta.z$ZIPJ %in% c(NA),]
gdta.z.na$WFIPS <- as.character(toupper(gdta.z.na$WFIPS))

### remove obs with known ZIP code
fips.xwalk2 <- fips.xwalk#[fips.xwalk$ZIPJ %in% c(NA),]
fips.xwalk2$WFIPS <- as.character(fips.xwalk2$WFIPS)

idx10 <- which(fips.xwalk2$WFIPS %in% c("Do\xb1a Ana County, NM","Do\xe5\xb1a Ana County, NM","Do\x8c\xb1a Ana County, NM"))
#c(71713,71714,71716,71717,71721,71723,71734,
#	71736,71738,71748,71749,71753,71757,71762,71772,71774,71776,
#	86649,89220,89900,94277,94975,97369,103065,103439,105268)
fips.xwalk2$WFIPS[idx10] <- "Dona Ana County, NM"
fips.xwalk2$WFIPS <- as.character(toupper(fips.xwalk2$WFIPS))

#gdta.z.na$WFIPS[c(71713,71714,71716,71717,71721,71723,71734,
#	71736,71738,71748,71749,71753,71757,71762,71772,71774,71776,
#	86649,89220,89900,94277,94975,97369,103065,103439,105268)] <- "Dona Ana County, NM"

fips.xwalk3 <- subset(fips.xwalk2,select=c("WFIPS","NEW_NFIPS","NUMBER","IDALT"))
sum(fips.xwalk3$WFIPS %in% gdta.z.na$WFIPS)
sum(! fips.xwalk3$WFIPS %in% gdta.z.na$WFIPS)

length(unique(gdta.z.na$IDALT))
length(unique(fips.xwalk3$IDALT))
sum(fips.xwalk3$IDALT %in% gdta.z.na$IDALT)

length(unique(gdta.z.na$NUMBER))
length(unique(fips.xwalk3$NUMBER))
sum(fips.xwalk3$NUMBER %in% gdta.z.na$NUMBER)

fips.xwalk3$NUMBER <- NULL
summary(gdta.z.na$STCOFIPS)
idx.na <- which(gdta.z.na$STCOFIPS %in% c(NA))
na.length <- length(idx.na)

gdta.z.na$STCOFIPSNEW <- NA
for(i in 1:na.length){
  idx1 <- idx.na[i]
  wfips <- gdta.z.na$WFIPS[idx1]
  if(! wfips %in% c(NA)){
    xwalk.idx <- which(fips.xwalk3$WFIPS==wfips)
    tt <- table(fips.xwalk3$NEW_NFIPS[xwalk.idx])
    if(length(tt) > 0){
      gdta.z.na$STCOFIPSNEW[idx1] <- as.numeric(names(tt[1]))
    }
    }
}

gdta.m <- gdta.z.na

summary(gdta.m$STCOFIPS)
gdta.m$STCOFIPS[gdta.m$STCOFIPS %in% c(NA)] <- gdta.m$STCOFIPSNEW[gdta.m$STCOFIPS %in% c(NA)]
summary(gdta.m$STCOFIPS)

sum(! gdta.m$STCOFIPS %in% c(NA) | ! gdta.m$STCOFIPSNEW %in% c(NA))


dta.sub <- unique(gdta.m[,c("STCOFIPS","WFIPS")])
dta.sub2 <- dta.sub[order(dta.sub$STCOFIPS),]
dim(dta.sub2)

rs <- sample(1:dim(gdta.m)[1],100,replace=F)
gdta.m[rs,c("STCOFIPS","WFIPS")]


save(gdta.m,file="kffsurvey/kffgeography/kff-geocodes-merge-05062019.Rdata")
